From: Ian Jackson Date: Fri, 6 Apr 2018 18:09:02 +0000 (+0100) Subject: docs/gen-html-index: Extract titles from HTML documents X-Git-Tag: archive/raspbian/4.11.1-1+rpi1~1^2~66^2~196 X-Git-Url: https://dgit.raspbian.org/%22http://www.example.com/cgi/%22/%22http:/www.example.com/cgi/%22?a=commitdiff_plain;h=7782db9260d4c6499458de4e8d9866bc0427e143;p=xen.git docs/gen-html-index: Extract titles from HTML documents Signed-off-by: Ian Jackson Release-acked-by: Juergen Gross Acked-by: Lars Kurth --- diff --git a/docs/gen-html-index b/docs/gen-html-index index e9792bf937..5b43b42a8c 100644 --- a/docs/gen-html-index +++ b/docs/gen-html-index @@ -10,6 +10,7 @@ use warnings; use Getopt::Long; use IO::File; use File::Basename; +use HTML::TreeBuilder::XPath; Getopt::Long::Configure('bundling'); @@ -64,6 +65,18 @@ sub make_linktext ($) { return "$1($2)" if $l =~ m,^man/(.*)\.([0-9].*)\.html,; $l =~ s/.(?:html|txt)$//g; return $index{$l} if exists $index{$l}; + + my $from_html; + eval { + my $tree = new HTML::TreeBuilder::XPath; + my $f = "$outdir/$l.html"; + open F, '<', $f or die "$l $f $!"; + $tree->parse_file(\*F) or die; + close F; + $from_html = $tree->findvalue("/html/head/title"); + }; + return $from_html if $from_html; + return basename($l); }